clear all
set more off
set type double, perm
set scheme white_tableau
capture log close

global d = 0

if $d == 0 { 
global topdir "YOUR DIRECTORY"
global datadir "${topdir}/data"
global outdir "${topdir}/output"
cd "${outdir}"
}


/*  ------------------------------------------------------------------------  */
/*                         Simulate the Data                                  */
/*  ------------------------------------------------------------------------  */

global N = 10000 // simulated population size
global n = 2000 // simulated sample size to run the discrete version of Yitzhaki decomposition
global T = 10
set obs $N 
set seed 654321

scalar r_1 = 0.2 // marginal productivity of human capital
scalar r_2 = 0.21
scalar I_1 = 0.05 // optimal investment rate
scalar I_2 = 0.053

gen e1 = rnormal()/10^4 // disturbance terms
gen e2 = rnormal()/10^4
gen e3 = rnormal()/10^4
gen e4 = rnormal()/10^4	

gen H0=rbeta(5,1.5) // initial human capital distribution
gen is5pct = (_n<=0.05*${N})
sort H0
gen isbottom5pct = (_n<=0.05*${N})

// regime 1
gen H1_1=H0*(1+I_1)^$T + e1 // HC investment for 10 periods
gen lnW_1 = r_1*H1_1 + e3 // assume log-linear relationship between wages and H
gen b_1 = r_1

// regime 2a: r increases for all, with (random) 5% of population ending up at low-optimum
gen H1_2a=H0*(1+I_2)^$T + e2
replace H1_2a=H0 + e2 if is5pct==1
gen lnW_2a = r_2*H1_2a + e4
gen b_2a = r_2

// regime 2b: r increases for all, with bottom 5% of H0 distribution ending up at low-optimum 
gen H1_2b=H0*(1+I_2)^$T + e2
replace H1_2b=H0 + e2 if isbottom5pct==1
gen lnW_2b = r_2*H1_2b + e4 
gen b_2b = r_2


/*  ------------------------------------------------------------------------  */
/*                Yitzhaki using All and Unbinned Data                        */
/*  ------------------------------------------------------------------------  */

foreach x in 1 2a 2b {
quietly sum H1_`x'
local Var_`x'=_result(4)
local mu_`x'=_result(3)
}

foreach x in 1 2a 2b {
sort H1_`x'
gen rsum = sum(H1_`x')
gen w_`x' = (_n/_N) * (1/`Var_`x'') * (`mu_`x''-rsum/_n)
egen wsum_`x'=total(w_`x')
replace w_`x' = w_`x'/wsum_`x'
drop rsum wsum_`x'
}

/*  ------------------------------------------------------------------------  */
/*                            Figure 4                                        */
/*  ------------------------------------------------------------------------  */

twoway (kdensity H1_1) ///
		(kdensity H1_2a, lpattern(_)) ///
		(kdensity H1_2b, lpattern(-#-)), ///
		legend(label(1 "Regime 1") label(2 "Regime 2(a): random 5% subject to fixed cost") label(3 "Regime 2(b): bottom 5% H{sub:0} subject to fixed cost") row(3) pos(6)) title("Distribution of H{sub:1}") xtitle("H{sub:1}") graphregion(fcolor(gs16)) 
graph export "Figure4.pdf", replace
graph export "Figure4.eps", replace

/*  ------------------------------------------------------------------------  */
/*                            Figure 5                                        */
/*  ------------------------------------------------------------------------  */

twoway (scatter w_1 H1_1, msize(tiny)) ///
		(scatter w_2a H1_2a, msize(tiny)) ///
		(scatter w_2b H1_2b, msize(tiny)), ///
		legend(label(1 "Regime 1") label(2 "Regime 2(a): random 5% subject to fixed cost") label(3 "Regime 2(b): bottom 5% H{sub:0} subject to fixed cost") row(3) pos(6)) title("Yitzhaki Weights w(H{sub:1})") xtitle("H{sub:1}") graphregion(fcolor(gs16)) 
graph export "Figure5.pdf", replace
graph export "Figure5.eps", replace



/*  ------------------------------------------------------------------------  */
/*                Yitzhaki using 20% and Binned Data                          */
/*  ------------------------------------------------------------------------  */

preserve 

gen rand = runiform()
sort rand
keep if _n <= $n // keep a sample size of 2000, which is close to the actaul sample size for white men or women in each NLSY cohort
drop rand

foreach x in 1 2a 2b {

// calculate unit slopes and weights
sort H1_`x'
gen b_disc_`x' = (lnW_`x'[_n+1]-lnW_`x'[_n])/(H1_`x'[_n+1]-H1_`x'[_n]) // unit slopes
quietly sum H1_`x'
local Var_`x'=_result(4)
local mu_`x'=_result(3)
gen rsum = sum(H1_`x') // running sum of X
egen sum = total(H1_`x') // total sum of X
gen term1 = (1/`Var_`x'') * (_n/_N) * ((_N-_n)/_N)
gen term2 = (sum - rsum)/(_N-_n) - rsum/_n
gen term3 = H1_`x'[_n+1] - H1_`x'[_n]
gen w_disc_`x' = term1 * term2 * term3 // Yitzhaki weights
drop rsum sum term1 term2 term3 

// binning	
gen H1_`x'_bin = 0.01*ceil(H1_`x'/0.01) 
sort H1_`x'_bin
by H1_`x'_bin: egen ols_sumwgt_`x'_bin = total(w_disc_`x') // sum weights by bin
by H1_`x'_bin: egen ols_sumbeta_`x'_bin = total(b_disc_`x'*w_disc_`x') // calculate avg. ols estiamtes by bin
gen ols_avgbeta_`x'_bin = ols_sumbeta_`x'_bin/ols_sumwgt_`x'_bin

}

/*  ------------------------------------------------------------------------  */
/*                            Figure 6                                        */
/*  ------------------------------------------------------------------------  */

twoway (scatter ols_sumwgt_1_bin H1_1_bin, msize(small) msym(oh)) ///
		(scatter ols_sumwgt_2a_bin H1_2a_bin, msize(small) msym(t)) ///
		(scatter ols_sumwgt_2b_bin H1_2b_bin if ols_sumwgt_2b_bin<0.1, msize(vsmall) msym(d)), ///
		legend(label(1 "Regime 1") label(2 "Regime 2(a): random 5% subject to fixed cost") label(3 "Regime 2(b): bottom 5% H{sub:0} subject to fixed cost") row(3) pos(6)) title("Yitzhaki Weights w(H{sub:1})") subtitle("using a random sample and binned data") xtitle("H{sub:1} bin") graphregion(fcolor(gs16)) 
graph export "Figure6.pdf", replace
graph export "Figure6.eps", replace

/*  ------------------------------------------------------------------------  */
/*                            Figure 7                                        */
/*  ------------------------------------------------------------------------  */

twoway (scatter ols_avgbeta_1_bin H1_1_bin, msize(small) msym(oh)) ///
		(scatter ols_avgbeta_2a_bin H1_2a_bin, msize(small) msym(t)) ///
		(scatter ols_avgbeta_2b_bin H1_2b_bin, msize(vsmall) msym(d)), ///
		legend(label(1 "Regime 1") label(2 "Regime 2(a): random 5% subject to fixed cost") label(3 "Regime 2(b): bottom 5% H{sub:0} subject to fixed cost") row(3) pos(6)) title("Pairwise Slopes b(H{sub:1})") subtitle("using a random sample and binned data") xtitle("H{sub:1} bin") graphregion(fcolor(gs16))
graph export "Figure7.pdf", replace
graph export "Figure7.eps", replace


restore



exit








